suppressWarnings(library(Seurat))
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
suppressWarnings(library(scran))
suppressWarnings(library(scDblFinder))
suppressWarnings(library(tidyverse))
suppressWarnings(library(RColorBrewer))
suppressWarnings(library(openxlsx))
suppressWarnings(library(circlize))
suppressWarnings(library(gtools))
suppressWarnings(library(ggExtra))
suppressWarnings(library(ggridges))
suppressWarnings(library(patchwork))
projectDir <- "."
projectPath <- file.path(projectDir)
outputPath <- file.path(projectPath)
dataDir <- "data"
dataPath <- file.path(projectPath,dataDir)
prefix <- "Myeloid"
dir.create(file.path(outputPath),recursive = T)
setwd(outputPath)
color.list <- c("#ebac23", "#b80058", "#008cf9",
"#006e00", "#00bbad", "#d163e6",
"#b24502", "#ff9287", "#5954d6",
"#00c6f8", "#878500", "#00a76c",
"#bdbdbd", "#846b54",
brewer.pal(12, "Paired"),
brewer.pal(12, "Set3"),
brewer.pal(8, "Pastel2"),
colorRampPalette(c("grey20","grey70"))(4))
sc <- Read10X(file.path(dataPath,"myeloid.counts"))
sc <- CreateSeuratObject(
counts = sc,
assay = "RNA",
project = "Myeloid",
names.field = c(1,2),
names.delim = "_",
min.cells = 0,
min.features = 0
)
sc@meta.data$SampleName <- sc@meta.data$orig.ident
table(sc$SampleName)
CD11b_KO CD11b_WT
8931 11395
cellSet <- "Myeloid"
subsetName <- "Integrated"
nfeatures <- 2000
ress <- c(0.5)
npcs <- 20
bySample <- SplitObject(sc, split.by = "SampleName")
# Integration via CCA protocol
bySample <- lapply(bySample, function (x) {
x <- x %>% NormalizeData(verbose=F) %>%
FindVariableFeatures(nfeatures = nfeatures,
verbose=F) %>%
ScaleData(verbose=F) %>%
RunPCA(verbose=F)
return(x)
})
commonFeatures <- SelectIntegrationFeatures(bySample,
verbose=F)
smanchors <- FindIntegrationAnchors(bySample,
anchor.features = commonFeatures,
verbose=F)
| | 0 % ~calculating
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=06m 18s
bySample.cca <- IntegrateData(anchorset = smanchors,
features.to.integrate = rownames(bySample[[1]]),
verbose=F)
DefaultAssay(bySample.cca) <- "integrated"
# Run the standard workflow for clustering and visualization of integrated data
bySample.cca <- bySample.cca %>%
ScaleData(verbose = FALSE) %>%
RunPCA(verbose = FALSE) %>%
FindNeighbors(reduction = "pca",
dims = seq(npcs),
force.recalc = T,
verbose=F) %>%
FindClusters(resolution = ress,
verbose=F) %>%
RunUMAP(reduction = "pca",
dims = seq(npcs),
verbose=F) %>%
RunTSNE(reduction = "pca",
dims = seq(npcs),
perplexity=100,
verbose=F)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
# Reannotate clusters
bySample.cca@meta.data <- bySample.cca@meta.data %>%
mutate(across(starts_with("integrated_"),.fns = function (x){paste0("C",x)})) %>%
mutate(across(starts_with("integrated_"),.fns = function (x){factor(x)})) %>%
mutate(across(starts_with("integrated_"),.fns = function (x){factor(x,levels=mixedsort(levels(x)))}))
saveRDS(bySample.cca,file = file.path(outputPath,paste(cellSet,subsetName,"seurat","rds",sep = ".")))
bySample.cca
An object of class Seurat
55996 features across 20326 samples within 2 assays
Active assay: integrated (27998 features, 2000 variable features)
1 other assay present: RNA
3 dimensional reductions calculated: pca, umap, tsne
clustering <- "integrated_snn_res.0.5"
p1 <- DimPlot(bySample.cca,
reduction = "tsne",
group.by = "SampleName",
cols = color.list)
p2 <- DimPlot(bySample.cca,
reduction = "tsne",
group.by = clustering,
cols = color.list,
label = T)
p1 + p2
DimPlot(bySample.cca,
reduction = "tsne",
group.by = clustering,
cols = color.list,
split.by = "SampleName")
Clusters and Cells are classified based on SingleR method using Blueprint Encode, the Human Primary Cell Atlas and the Monaco Immune cell type profiles collection. This identification is not precise and should be interpreted with caution.
library(SingleR)
useAssay <- "RNA"
clustering <- "integrated_snn_res.0.5"
Idents(bySample.cca) <- clustering
bySample.cca.WT <- subset(bySample.cca,SampleName == "CD11b_WT")
nCores <- 1
immGen=celldex::ImmGenData()
snapshotDate(): 2021-05-18
see ?celldex and browseVignettes('celldex') for documentation
loading from cache
see ?celldex and browseVignettes('celldex') for documentation
loading from cache
singler <- list(
byCluster = list()
)
singler <- SingleR(
clusters = Idents(bySample.cca.WT)
, test = Seurat::Assays(bySample.cca.WT,slot = useAssay)@data
, ref = immGen
, labels = immGen$label.fine
, genes = "de"
, quantile = 0.8
, fine.tune = T
, tune.thresh = 0.05
, sd.thresh = 1
)
bySample.cca@meta.data <- bySample.cca@meta.data %>%
mutate(immGenLabels = singler[integrated_snn_res.0.5,"first.labels"],
immGenFTLabels = singler[integrated_snn_res.0.5,"labels"])
saveRDS(bySample.cca,file = file.path(outputPath,paste(cellSet,subsetName,"seurat","rds",sep=".")))
p1 <- DimPlot(bySample.cca,
reduction = "tsne",
group.by = "immGenFTLabels",
cols = color.list)
p2 <- DimPlot(bySample.cca,
reduction = "tsne",
group.by = clustering,
cols = color.list,
label = T)
p1 + p2
df <- Embeddings(bySample.cca,reduction = "tsne")
df <- cbind(df,bySample.cca@meta.data)
df <- df %>%
mutate(ManualClustering = factor(ifelse(
tSNE_1 > -25.5 & integrated_snn_res.0.5 == "C9" ,"C9b",as.character(integrated_snn_res.0.5)
))) %>%
mutate(ManualClustering = factor(ManualClustering,levels=mixedsort(levels(ManualClustering))))
bySample.cca@meta.data$ManualClustering <- df$ManualClustering
DimPlot(bySample.cca,
reduction = "tsne",
group.by = "ManualClustering",
cols = color.list,label = T,pt.size = 2)
Data obtained from Krenkel et. al. Gut 2020. Dataset GSE131834. Clusterized using standard procedures (2000 variable features and 20 PCA components for clustering and dimensionality reduction).
Final annotations to extract cell type signatures imported from original analysis.
krenelMetadata <- read.delim(
file = file.path(dataPath,"GSE131834_annotation_bonemarrow.csv"),
sep=",", header = T)
colnames(krenelMetadata) <- c("cellId", "cell_type")
krenelMetadata <- krenelMetadata %>%
mutate(cellId=sub("-",".",cellId))
rownames(krenelMetadata) <- krenelMetadata$cellId
krenkelCounts <- read.table(
file = file.path(dataPath,"GSE131834_BM_ND_WD_WT16.txt"),
sep = "\t",header=T)
krenkelCounts <- Matrix::Matrix(as.matrix(krenkelCounts))
krenkelSeurat <- CreateSeuratObject(
krenkelCounts[,rownames(krenelMetadata)],
project = "Krenkel et al",
assay = "RNA",
meta.data = krenelMetadata
)
nfeatures = 2000
npcs <- 20
ress <- c(0.5)
krenkelSeurat <- krenkelSeurat %>%
NormalizeData(
verbose = F
) %>%
FindVariableFeatures(
nfeatures=nfeatures,
verbose = F
) %>%
ScaleData(
verbose = F
) %>%
RunPCA(
verbose = F
) %>%
FindNeighbors(
reduction = "pca",
dims = seq(npcs),
force.recalc=T,
verbose = F,
) %>%
FindClusters(
resolution=ress,
verbose = F
) %>%
RunUMAP(
reduction = "pca",
dims = seq(npcs),
verbose = F
) %>%
RunTSNE(
reduction = "pca",
dims = seq(npcs),
perplexity = 100,
verbose = F
)
clustering <- "RNA_snn_res.0.5"
Idents(krenkelSeurat) <- clustering
p1 <- DimPlot(krenkelSeurat,
reduction = "tsne",
group.by = "RNA_snn_res.0.5",
cols = color.list,
label = T)
p2 <- DimPlot(krenkelSeurat,
reduction = "tsne",
group.by = "cell_type",
cols = color.list,
label = T)
p1+p2
Idents(krenkelSeurat) <- "cell_type"
markersKrenkel <- krenkelSeurat %>%
FindAllMarkers(
assay = "RNA",
test.use = "wilcox",
slot = "data",
only.pos = T,
verbose = F)
table(markersKrenkel[markersKrenkel$p_val_adj < 0.01,"cluster"])
5 CMoP II 9 HSC 6 preDC I 1 BMM I 2 BMM II
468 772 716 196 366
3 BMM III 4 CMoP I 7 preDC II 8 preDC III
267 367 210 762
top100 <- markersKrenkel %>%
filter(p_val_adj < 0.01) %>%
group_by(cluster) %>%
top_n(n = 100, wt = avg_log2FC)
krenkelSeurat <- ScaleData(krenkelSeurat,
features = unique(sort(c(VariableFeatures(krenkelSeurat),top100$gene))),
verbose = F)
DoHeatmap(krenkelSeurat, features = top100$gene) + NoLegend()
ntop <- 10
pval <- 0.01
topMarkers <- markersKrenkel %>%
filter(p_val_adj < pval) %>%
group_by(cluster) %>%
top_n(ntop, avg_log2FC) %>%
ungroup() %>%
dplyr::select(gene) %>%
distinct()
DotPlot(
krenkelSeurat,
features = rev(topMarkers)
) +
theme(axis.text.x = element_text(angle = 45,hjust = 1))
Gene modules selected from the top 200 marker genes at an adjusted pval < 0.01, a mÃnimum logFC of 0.25, present in our Stromal dataset and not shared between cell types.
celltypeMarkers <- markersKrenkel %>%
filter(p_val_adj < 0.01) %>%
filter(avg_log2FC > 0.25) %>%
filter(gene %in% rownames(bySample.cca)) %>%
group_by(cluster) %>%
top_n(n = 200, wt = avg_log2FC) %>%
filter(gene %in% rownames(bySample.cca)) %>%
dplyr::select(cluster,gene)
whichDup <- unique(sort(celltypeMarkers$gene[which(duplicated(celltypeMarkers$gene))]))
celltypeMarkers <- celltypeMarkers %>% filter(!gene %in% whichDup)
table(celltypeMarkers$cluster)
5 CMoP II 9 HSC 6 preDC I 1 BMM I 2 BMM II
112 137 122 86 101
3 BMM III 4 CMoP I 7 preDC II 8 preDC III
93 41 102 16
krenkelSeurat <- ScaleData(krenkelSeurat,
features = unique(sort(c(VariableFeatures(krenkelSeurat),celltypeMarkers$gene))),
verbose = F)
DoHeatmap(krenkelSeurat, features = celltypeMarkers$gene) + NoLegend()
moduleGeneListKrenkel <- celltypeMarkers %>%
group_by(cluster) %>%
group_split()
moduleGeneListKrenkel <- as.list(moduleGeneListKrenkel)
names(moduleGeneListKrenkel) <- levels(factor(celltypeMarkers$cluster))
Data obtained from Xie et. al. Nature 2020. Dataset GSE137539.
Clustering of neutrophils from wt neutrophil samples. Clustering of samples using CCA for integration and batch effect corrections (2000 variable features, 20 PCA components).
Final annotations to extract cell type signatures imported from original analysis.
xieMetadata <- read.delim(
file = file.path(dataPath,"GSE137539_wt_ctl_meta.txt"),
sep="\t", header = T)
xieMetadata <- xieMetadata %>%
mutate(cellId = rownames(.)) %>%
dplyr::select(cellId, orig.ident, cell_type, cluster)
myCountsFiles <- dir(file.path(dataPath),pattern = "GSM.+wt_ctl")
xieCounts <- lapply(myCountsFiles, function (f,dataPath) {
sname <- gsub("_",".",gsub("GSM\\d+_(wt_ctl_\\w+).+","\\1",f))
counts <- read.table(
file = gzfile(file.path(dataPath,f),open = "r"),
sep = " ",header=T)
colnames(counts) <- paste0(sname,"_",colnames(counts))
counts <- Matrix::Matrix(as.matrix(counts))
},dataPath)
xieCounts <- do.call(cbind,xieCounts)
xieCounts <- xieCounts[,rownames(xieMetadata)]
xieSeurat <- CreateSeuratObject(
counts = xieCounts,
project = "Xie et al",
assay = "RNA",
meta.data = xieMetadata,
names.delim = "_",
names.field = 1
)
xieSeurat <- xieSeurat[,!is.na(xieSeurat$cluster)]
xieSeuratList <- SplitObject(xieSeurat,split.by = "orig.ident")
xieSeuratList <- lapply(xieSeuratList, function (x) {
x <- x %>% NormalizeData(
verbose = F
) %>%
FindVariableFeatures(
nfeatures = nfeatures,
verbose = F
) %>%
ScaleData(
verbose = F
) %>%
RunPCA(
verbose = F
)
return(x)
})
commonFeatures <- SelectIntegrationFeatures(xieSeuratList,
verbose = F)
smanchors <- FindIntegrationAnchors(
xieSeuratList,
anchor.features = commonFeatures,
verbose = F
)
| | 0 % ~calculating
|+++++++++ | 17% ~03m 37s
|+++++++++++++++++ | 33% ~03m 22s
|+++++++++++++++++++++++++ | 50% ~03m 24s
|++++++++++++++++++++++++++++++++++ | 67% ~01m 54s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~54s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=05m 26s
xieSeurat.cca <- IntegrateData(anchorset = smanchors,
verbose = F)
DefaultAssay(xieSeurat.cca) <- "integrated"
# Run the standard workflow for visualization and clustering
nfeatures <- 2000
npcs <- 20
xieSeurat.cca <- xieSeurat.cca %>%
ScaleData(
verbose = F
) %>%
RunPCA(
npcs = 50,
verbose = F
) %>%
FindNeighbors(
reduction = "pca",
dims = seq(npcs),
force.recalc = T,
verbose = F
) %>%
FindClusters(
resolution = ress,
verbose = F
) %>%
RunUMAP(
reduction = "pca",
dims = seq(npcs),
verbose = F
) %>%
RunTSNE(
reduction = "pca",
dims = seq(npcs),
perplexity=100,
verbose = F
)
clustering <- "integrated_snn_res.0.5"
Idents(xieSeurat.cca) <- clustering
p1 <- DimPlot(xieSeurat.cca,
reduction = "tsne",
group.by = clustering,
cols = color.list,
label = T)
p2 <- DimPlot(xieSeurat.cca,
reduction = "tsne",
group.by = "cell_type",
cols = color.list,
label = T)
p1+p2
p1 <- DimPlot(xieSeurat.cca,
reduction = "tsne",
group.by = clustering,
cols = color.list,
label = T)
p2 <- DimPlot(xieSeurat.cca,
reduction = "tsne",
group.by = "cluster",
cols = color.list,
label = T)
p1+p2
Select samples from bone marrow and remove those left at clusters G5 which clearly comes from SP or PB.
xieSeurat.cca <- subset(xieSeurat.cca, orig.ident %in% c("wt.ctl.bm1","wt.ctl.bm2") & cluster %in% c("G0","G1","G2","G3","G4","GM"))
p1 <- DimPlot(xieSeurat.cca,
reduction = "tsne",
group.by = clustering,
cols = color.list,
label = T)
p2 <- DimPlot(xieSeurat.cca,
reduction = "tsne",
group.by = "cluster",
cols = color.list,
label = T)
p1+p2
DefaultAssay(xieSeurat.cca) <- "RNA"
Idents(xieSeurat.cca) <- "cluster"
markersXie <- xieSeurat.cca %>%
FindAllMarkers(
assay = "RNA",
test.use = "wilcox",
slot = "data",
only.pos = T,
verbose = F)
table(markersXie[markersXie$p_val_adj < 0.01,"cluster"])
G0 G4 G2 G1 G3 GM
2943 423 621 2226 153 50
top100 <- markersXie %>%
filter(p_val_adj < 0.01) %>%
group_by(cluster) %>%
top_n(n = 100, wt = avg_log2FC)
xieSeurat.cca <- ScaleData(xieSeurat.cca,
features = unique(sort(c(VariableFeatures(xieSeurat.cca),top100$gene))),
verbose = F)
DoHeatmap(xieSeurat.cca, features = top100$gene) + NoLegend()
ntop <- 10
pval <- 0.01
topMarkers <- markersXie %>%
filter(p_val_adj < pval) %>%
group_by(cluster) %>%
top_n(ntop, avg_log2FC) %>%
ungroup() %>%
dplyr::select(gene) %>%
distinct()
DotPlot(
xieSeurat.cca,
features = rev(topMarkers)
) +
theme(axis.text.x = element_text(angle = 45,hjust = 1))
Gene modules selected from the top 200 marker genes at an adjusted pval < 0.01, a mÃnimum logFC of 0.25, present in our Stromal dataset and not shared between cell types.
celltypeMarkers <- markersXie %>%
filter(p_val_adj < 0.01) %>%
filter(avg_log2FC > 0.25) %>%
filter(gene %in% rownames(bySample.cca)) %>%
group_by(cluster) %>%
top_n(n = 100, wt = avg_log2FC) %>%
# filter(gene %in% rownames(bySample.cca)) %>%
dplyr::select(cluster,gene) %>%
filter(cluster != "GM")
whichDup <- unique(sort(celltypeMarkers$gene[which(duplicated(celltypeMarkers$gene))]))
celltypeMarkers <- celltypeMarkers %>% filter(!gene %in% whichDup)
table(celltypeMarkers$cluster)
G0 G4 G2 G1 G3 GM
24 99 88 25 89 0
xieSeurat.cca <- ScaleData(xieSeurat.cca,
features = unique(sort(c(VariableFeatures(xieSeurat.cca),celltypeMarkers$gene))),
verbose = F)
DoHeatmap(xieSeurat.cca, features = celltypeMarkers$gene) + NoLegend()
moduleGeneListXie <- celltypeMarkers %>%
group_by(cluster) %>%
group_split()
moduleGeneListXie <- as.list(moduleGeneListXie)
names(moduleGeneListXie) <- levels(factor(celltypeMarkers$cluster))
moduleGeneList <- c(moduleGeneListKrenkel,moduleGeneListXie)
for (i in names(moduleGeneList)) {
bySample.cca <- AddModuleScore(bySample.cca
,features = list(i=moduleGeneList[[i]]$gene)
,name = paste0(i,".RNAModule")
,assay = "RNA"
,verbose = F
)
}
df <- Embeddings(bySample.cca,reduction = "tsne")
dims <- colnames(df)
df <- cbind(df,bySample.cca@meta.data)
df %>%
filter(SampleName == "CD11b_WT" & !ManualClustering %in% c("C0","C1","C2","C3","C5","C6","C8","C9","C9b","C10")) %>%
pivot_longer(cols = contains("X"), names_to = "Module", values_to = "Score") %>%
mutate(Module=sub(".RNAModule1","",Module)) %>%
ggplot(aes_string(x=dims[1],y=dims[2],color="Score")) +
geom_point() +
scale_color_gradientn(colors = colorRampPalette(c("grey","orange","red"))(3),name="Log(NormCounts)") +
facet_wrap("Module") +
theme_classic()
df <- Embeddings(bySample.cca,reduction = "tsne")
dims <- colnames(df)
df <- cbind(df,bySample.cca@meta.data)
df %>%
filter(SampleName == "CD11b_WT" & ManualClustering %in% c("C0","C1","C2","C3","C5","C6","C8","C9","C9b","C10")) %>%
pivot_longer(cols = contains(c("G0","G1","G2","G3","G4")), names_to = "Module", values_to = "Score") %>%
mutate(Module=sub(".RNAModule1","",Module)) %>%
ggplot(aes_string(x=dims[1],y=dims[2],color="Score")) +
geom_point() +
scale_color_gradientn(colors = colorRampPalette(c("grey","orange","red"))(3),name="Log(NormCounts)") +
facet_wrap("Module") +
theme_classic()
Cluster assigned to the maximum enrichment module score +- 0.02. Some clusters may score two modules or more as maximum.
Analysis based on WT transcriptome only.
clustering <- "ManualClustering"
bySample.cca@meta.data %>%
filter(SampleName == "CD11b_WT" & !ManualClustering %in% c("C0","C1","C2","C3","C5","C6","C8","C9","C9b","C10")) %>%
dplyr::select(contains(c(clustering,"Module1"))) %>%
dplyr::select(-contains(c("G0","G1","G2","G3","G4"))) %>%
pivot_longer(cols = contains("RNAModule1"),names_to = "Module",values_to = "Score") %>%
rename(Cluster=clustering) %>%
group_by(Module) %>%
summarise(Score=scale(Score,center = T,scale = T),Cluster=Cluster) %>%
group_by(Cluster,Module) %>%
summarise(AverageScore=mean(Score)) %>%
group_by(Cluster) %>%
summarise(Module=Module,AverageScoreScaled=scale(AverageScore),MaxEnrichment=(AverageScore >= (max(AverageScore)-0.02))) %>%
mutate(Module=sub(".RNAModule1","",Module)) %>%
mutate(plotBorder=ifelse(MaxEnrichment,1.5,0)) %>%
ggplot() +
geom_point(aes_string(x="Module",y="Cluster",fill="AverageScoreScaled",color="MaxEnrichment", stroke = "plotBorder"),size=6,shape=21) +
scale_fill_gradient(low = "grey90",high = "blue") +
scale_color_manual(values = c(NA,"red")) +
theme(axis.text.x = element_text(angle = 45,hjust = 1))
Note: Using an external vector in selections is ambiguous.
i Use `all_of(clustering)` instead of `clustering` to silence this message.
i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
This message is displayed once per session.`summarise()` has grouped output by 'Module'. You can override using the `.groups` argument.`summarise()` has grouped output by 'Cluster'. You can override using the `.groups` argument.`summarise()` has grouped output by 'Cluster'. You can override using the `.groups` argument.
# Assign Sommerkamp cell type to clusters using WT info
clustering <- "ManualClustering"
cellTypeAnnot <- bySample.cca@meta.data %>%
filter(SampleName == "CD11b_WT" & !ManualClustering %in% c("C0","C1","C2","C3","C5","C6","C8","C9","C9b","C10")) %>%
dplyr::select(contains(c(clustering,"Module1"))) %>%
dplyr::select(-contains(c("G0","G1","G2","G3","G4"))) %>%
pivot_longer(cols = contains("RNAModule1"),names_to = "Module",values_to = "Score") %>%
rename(Cluster=clustering) %>%
group_by(Module) %>%
summarise(Score=scale(Score,center = T,scale = T),Cluster=Cluster) %>%
group_by(Cluster,Module) %>%
summarise(AverageScore=mean(Score)) %>%
group_by(Cluster) %>%
summarise(Module=Module,AverageScoreScaled=scale(AverageScore),MaxEnrichment=(AverageScore >= (max(AverageScore)-0.02))) %>%
mutate(Module=sub(".RNAModule1","",Module)) %>%
mutate(Selected=ifelse(MaxEnrichment,1,0)) %>%
filter(Selected == 1) %>%
group_by(Cluster) %>%
summarise(CellType1 = paste(Module,collapse="_"))
cellTypeAnnot <- cellTypeAnnot %>%
mutate(CellType1 = sub("\\.","_",sub("^X\\d+\\.","",CellType1,perl=T)))
bySample.cca@meta.data <- bySample.cca@meta.data %>%
left_join(cellTypeAnnot, by = c("ManualClustering" = "Cluster")) %>% as.data.frame()
rownames(bySample.cca@meta.data) = colnames(bySample.cca)
p1 <- DimPlot(bySample.cca,
reduction = "tsne",
group.by = clustering,
cols = color.list,
label = T) + theme(legend.position = "none")
p2 <- DimPlot(bySample.cca,
reduction = "tsne",
group.by = "CellType1",
cols = color.list,
label = F)
p1+p2
clustering <- "ManualClustering"
bySample.cca@meta.data %>%
filter(SampleName == "CD11b_WT" & ManualClustering %in% c("C0","C1","C2","C3","C5","C6","C8","C9","C9b","C10")) %>%
dplyr::select(contains(c(clustering,"G0","G1","G2","G3","G4"))) %>%
pivot_longer(cols = contains("RNAModule1"),names_to = "Module",values_to = "Score") %>%
rename(Cluster=clustering) %>%
group_by(Module) %>%
summarise(Score=scale(Score,center = T,scale = T),Cluster=Cluster) %>%
group_by(Cluster,Module) %>%
summarise(AverageScore=mean(Score)) %>%
group_by(Cluster) %>%
summarise(Module=Module,AverageScoreScaled=scale(AverageScore),MaxEnrichment=(AverageScore >= (max(AverageScore)-0.02))) %>%
mutate(Module=sub(".RNAModule1","",Module)) %>%
mutate(plotBorder=ifelse(MaxEnrichment,1.5,0)) %>%
ggplot() +
geom_point(aes_string(x="Module",y="Cluster",fill="AverageScoreScaled",color="MaxEnrichment", stroke = "plotBorder"),size=6,shape=21) +
scale_fill_gradient(low = "grey90",high = "blue") +
scale_color_manual(values = c(NA,"red")) +
theme(axis.text.x = element_text(angle = 45))
`summarise()` has grouped output by 'Module'. You can override using the `.groups` argument.`summarise()` has grouped output by 'Cluster'. You can override using the `.groups` argument.`summarise()` has grouped output by 'Cluster'. You can override using the `.groups` argument.
# Assign Sommerkamp cell type to clusters using WT info
clustering <- "ManualClustering"
cellTypeAnnot <- bySample.cca@meta.data %>%
filter(SampleName == "CD11b_WT" & ManualClustering %in% c("C0","C1","C2","C3","C5","C6","C8","C9","C9b","C10")) %>%
dplyr::select(contains(c(clustering,"G0","G1","G2","G3","G4"))) %>%
pivot_longer(cols = contains("RNAModule1"),names_to = "Module",values_to = "Score") %>%
rename(Cluster=clustering) %>%
group_by(Module) %>%
summarise(Score=scale(Score,center = T,scale = T),Cluster=Cluster) %>%
group_by(Cluster,Module) %>%
summarise(AverageScore=mean(Score)) %>%
group_by(Cluster) %>%
summarise(Module=Module,AverageScoreScaled=scale(AverageScore),MaxEnrichment=(AverageScore >= (max(AverageScore)-0.02))) %>%
mutate(Module=sub(".RNAModule1","",Module)) %>%
mutate(Selected=ifelse(MaxEnrichment,1,0)) %>%
filter(Selected == 1) %>%
group_by(Cluster) %>%
summarise(CellType2 = paste(Module,collapse="_"))
bySample.cca@meta.data <- bySample.cca@meta.data %>%
left_join(cellTypeAnnot, by = c("ManualClustering" = "Cluster")) %>% as.data.frame()
rownames(bySample.cca@meta.data) = colnames(bySample.cca)
p1 <- DimPlot(bySample.cca,
reduction = "tsne",
group.by = clustering,
cols = color.list,
label = T) + theme(legend.position = "none")
p2 <- DimPlot(bySample.cca,
reduction = "tsne",
group.by = "CellType2",
cols = color.list,
label = F)
p1+p2
bySample.cca@meta.data <- bySample.cca@meta.data %>%
mutate(CellType1 = ifelse(is.na(CellType1),CellType2,CellType1)) %>% dplyr::select(-CellType2)
p1 <- DimPlot(bySample.cca,
reduction = "tsne",
group.by = clustering,
cols = color.list,
label = T) + theme(legend.position = "none")
p2 <- DimPlot(bySample.cca,
reduction = "tsne",
group.by = "CellType1",
cols = color.list,
label = F)
p1+p2
DimPlot(bySample.cca,
reduction = "tsne",
group.by = "CellType1",
cols = color.list,
split.by = "SampleName")
bySample.cca@meta.data %>%
ggplot(aes(x="",fill=CellType1)) +
geom_bar(stat="count",position="fill") +
coord_polar("y",start = 0) +
scale_fill_manual(values = color.list) +
facet_wrap("SampleName") +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
panel.background = element_blank())
bySample.cca@meta.data %>%
group_by(SampleName) %>%
count(
CellType1
) %>%
pivot_wider(id_cols = SampleName, names_from = CellType1, values_from = n)
Based on WT transcriptomes.
clustering <- "CellType1"
Idents(bySample.cca) <- clustering
minPct <- 30
pval <- 0.01
useAssay <- "RNA"
method <- "wilcox"
DefaultAssay(bySample.cca) <- useAssay
bySample.cca.WT <- subset(bySample.cca, SampleName == "CD11b_WT")
markers <- bySample.cca.WT %>%
FindAllMarkers(
assay = useAssay
, slot = "data"
, min.pct = minPct/100
, return.thresh = pval
, test.use = method
, verbose = F
, only.pos = T
)
markers %>%
filter(p_val_adj < pval) %>%
group_by(cluster) %>%
count()
ntop <- 30
topMarkers <- markers %>%
filter(p_val_adj < pval) %>%
group_by(cluster) %>%
top_n(ntop, avg_log2FC)
bySample.cca <- bySample.cca %>%
ScaleData(features = c(VariableFeatures(.),topMarkers$gene),
assay = useAssay,
verbose = F)
bySample.cca %>%
DoHeatmap(assay = useAssay,
features = topMarkers$gene,
group.colors = color.list)
ntop <- 9
for (myCluster in levels(markers$cluster)) {
topMarkers <- markers %>%
filter(p_val_adj < pval & cluster == myCluster) %>%
top_n(ntop, avg_log2FC)
print(bySample.cca %>%
FeaturePlot(features = topMarkers$gene,
reduction = "tsne") +
NoLegend() +
plot_annotation(
title = paste("Markers for cell type:",myCluster
)
))
}
ntop <- 10
pvalcut <- 0.01
topMarkers <- markers %>%
filter(p_val_adj < pval) %>%
group_by(cluster) %>%
top_n(ntop, avg_log2FC) %>%
ungroup() %>%
dplyr::select(gene) %>%
distinct()
DotPlot(
bySample.cca,
assay = useAssay,
group.by = clustering,
features = rev(topMarkers)
) +
theme(axis.text.x = element_text(angle = 45,hjust = 1))
Within each Cell Type we have tested the differences between KO and WT conditions.
tde.files <- NULL
contrastByCond <- list()
useAssay <- "RNA"
clustering <- "CellType1"
testUse <- "wilcox"
minPct <- 0.1
cond1 <- "CD11b_KO"
cond2 <- "CD11b_WT"
DefaultAssay(bySample.cca) <- useAssay
bySample.cca <- bySample.cca %>%
SetIdent(value = paste(bySample.cca$CellType1,bySample.cca$SampleName,sep="."))
deGenes <- list()
for (myCluster in levels(factor(bySample.cca$CellType1))) {
ident1 <- paste(myCluster,cond1,sep=".")
ident2 <- paste(myCluster,cond2,sep=".")
deGenes[[myCluster]] <- bySample.cca %>%
FindMarkers(
assay = useAssay
, slot = "data"
, ident.1 = ident1
, ident.2 = ident2
, min.pct = minPct
, test.use = testUse
, verbose = F
) %>%
mutate(gene = rownames(.),
cluster = myCluster)
}
Idents(bySample.cca) <- clustering
pval <- 0.01
deGenes <- do.call(rbind,deGenes)
deGenes %>%
filter(p_val_adj < pval) %>%
group_by(cluster) %>%
count()
ntop <- 10
pvalcut <- 0.01
topDeGenes <- deGenes %>%
filter(p_val_adj < pval) %>%
group_by(cluster) %>%
top_n(ntop, avg_log2FC) %>%
ungroup() %>%
dplyr::select(gene) %>%
distinct()
DotPlot(
bySample.cca,
assay = "RNA",
group.by = clustering,
features = rev(topDeGenes),
split.by = "SampleName"
) +
theme(axis.text.x = element_text(angle = 45,hjust = 1))
df <- Embeddings(bySample.cca,reduction = "tsne")
dnames <- colnames(df)
df <- cbind(df,bySample.cca@meta.data)
df <- cbind(df,FetchData(bySample.cca,vars = c("Il1b","Il1rn")))
df %>%
arrange(Il1b) %>%
ggplot(aes_string(x=dnames[1],y=dnames[2])) +
geom_point(aes_string(color="Il1b")) +
facet_wrap("SampleName") +
ggtitle("Il1b") +
scale_color_gradientn(colors = colorRampPalette(c("grey","orange","red"))(3),name="Log(NormCounts)") +
theme_classic()
df %>%
arrange(Il1rn) %>%
ggplot(aes_string(x=dnames[1],y=dnames[2])) +
geom_point(aes_string(color="Il1rn")) +
facet_wrap("SampleName") +
ggtitle("Il1rn") +
scale_color_gradientn(colors = colorRampPalette(c("grey","orange","red"))(3),name="Log(NormCounts)") +
theme_classic()
df %>%
mutate(Il1Exp = case_when(
Il1b>0 & Il1rn == 0 ~ "Il1b only",
Il1b==0 & Il1rn > 0 ~ "Il1rn only",
Il1b>0 & Il1rn > 0 ~ "Il1b and Il1rn")) %>%
ggplot(aes(x="",fill=Il1Exp)) +
geom_bar(stat="count",position="fill") +
coord_polar("y",start = 0) +
scale_fill_manual(values = color.list) +
facet_wrap(c("SampleName","CellType1"),ncol = 6) +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
panel.background = element_blank())
p <- df %>%
filter(SampleName == "CD11b_WT") %>%
mutate(Il1Exp = case_when(
Il1b>0 & Il1rn == 0 ~ "Il1b only",
Il1b==0 & Il1rn > 0 ~ "Il1rn only",
Il1b>0 & Il1rn > 0 ~ "Il1b and Il1rn")) %>%
ggplot(aes(x=Il1rn,y=Il1b)) +
ggtitle("Myeloid WT Cells") +
geom_point() +
theme_classic()
ggMarginal(p,type = "densigram")
selectedGenes <- c("Lyn","Hif1a","Lmo4","Csf2rb","Myd88","Cxcr2","Nfkbia","Cebpb")
df <- FetchData(bySample.cca,vars = selectedGenes)
df <- cbind(df,bySample.cca@meta.data)
df <- df %>% mutate(SampleName=factor(SampleName,levels = c("CD11b_WT","CD11b_KO")))
library(stringr)
pList <- list()
for (gene in selectedGenes) {
pList[[length(pList)+1]] <- ggplot(df,aes_string(y=gene,x="SampleName",color="SampleName",fill="SampleName")) +
geom_violin(stat = "ydensity",alpha=0.2) +
# geom_jitter(alpha=1) +
scale_color_manual(values = c("grey40","#a80a0b")) +
scale_fill_manual(values = c("grey40","#a80a0b")) +
stat_summary(fun = "mean",
geom = "crossbar",
width = 0.5,
colour = "black") +
ylab("Expression Level log(NormCounts+1)") +
ggtitle(str_to_title(gene)) +
facet_wrap("CellType1",nrow = 1) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90),legend.position = "none")
}
p <- lapply(pList,plot)
sessionInfo()
R version 4.1.1 (2021-08-10)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19042)
Matrix products: default
locale:
[1] LC_COLLATE=English_United Kingdom.1252
[2] LC_CTYPE=English_United Kingdom.1252
[3] LC_MONETARY=English_United Kingdom.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United Kingdom.1252
attached base packages:
[1] parallel stats4 stats graphics grDevices utils
[7] datasets methods base
other attached packages:
[1] celldex_1.2.0 SingleR_1.6.1
[3] patchwork_1.1.1 ggridges_0.5.3
[5] ggExtra_0.10.0 gtools_3.9.2
[7] circlize_0.4.13 openxlsx_4.2.4
[9] RColorBrewer_1.1-2 forcats_0.5.1
[11] stringr_1.4.0 dplyr_1.0.8
[13] purrr_0.3.4 readr_2.1.2
[15] tidyr_1.2.0 tibble_3.1.6
[17] ggplot2_3.3.5 tidyverse_1.3.1
[19] scDblFinder_1.6.0 scran_1.20.1
[21] scuttle_1.2.1 SingleCellExperiment_1.14.1
[23] SummarizedExperiment_1.22.0 Biobase_2.52.0
[25] GenomicRanges_1.44.0 GenomeInfoDb_1.28.4
[27] IRanges_2.26.0 S4Vectors_0.30.2
[29] BiocGenerics_0.38.0 MatrixGenerics_1.4.3
[31] matrixStats_0.61.0 SeuratObject_4.0.2
[33] Seurat_4.0.5
loaded via a namespace (and not attached):
[1] utf8_1.2.2
[2] reticulate_1.22
[3] tidyselect_1.1.2
[4] RSQLite_2.2.8
[5] AnnotationDbi_1.54.1
[6] htmlwidgets_1.5.4
[7] grid_4.1.1
[8] BiocParallel_1.26.2
[9] Rtsne_0.15
[10] munsell_0.5.0
[11] ScaledMatrix_1.0.0
[12] codetools_0.2-18
[13] ica_1.0-2
[14] statmod_1.4.37
[15] xgboost_1.6.0.1
[16] future_1.22.1
[17] miniUI_0.1.1.1
[18] withr_2.5.0
[19] colorspace_2.0-2
[20] filelock_1.0.2
[21] knitr_1.36
[22] rstudioapi_0.13
[23] ROCR_1.0-11
[24] tensor_1.5
[25] listenv_0.8.0
[26] labeling_0.4.2
[27] GenomeInfoDbData_1.2.6
[28] polyclip_1.10-0
[29] farver_2.1.0
[30] bit64_4.0.5
[31] parallelly_1.28.1
[32] vctrs_0.4.0
[33] generics_0.1.2
[34] xfun_0.27
[35] BiocFileCache_2.0.0
[36] R6_2.5.1
[37] ggbeeswarm_0.6.0
[38] rsvd_1.0.5
[39] locfit_1.5-9.4
[40] bitops_1.0-7
[41] spatstat.utils_2.2-0
[42] cachem_1.0.6
[43] DelayedArray_0.18.0
[44] assertthat_0.2.1
[45] promises_1.2.0.1
[46] scales_1.1.1
[47] beeswarm_0.4.0
[48] gtable_0.3.0
[49] beachmat_2.8.1
[50] globals_0.14.0
[51] goftest_1.2-3
[52] rlang_1.0.2
[53] GlobalOptions_0.1.2
[54] splines_4.1.1
[55] lazyeval_0.2.2
[56] broom_0.7.9
[57] spatstat.geom_2.3-0
[58] modelr_0.1.8
[59] BiocManager_1.30.16
[60] yaml_2.2.1
[61] reshape2_1.4.4
[62] abind_1.4-5
[63] backports_1.3.0
[64] httpuv_1.6.3
[65] tools_4.1.1
[66] ellipsis_0.3.2
[67] spatstat.core_2.3-0
[68] Rcpp_1.0.7
[69] plyr_1.8.6
[70] sparseMatrixStats_1.4.2
[71] zlibbioc_1.38.0
[72] RCurl_1.98-1.5
[73] rpart_4.1-15
[74] deldir_1.0-6
[75] pbapply_1.5-0
[76] viridis_0.6.2
[77] cowplot_1.1.1
[78] zoo_1.8-9
[79] haven_2.4.3
[80] ggrepel_0.9.1
[81] cluster_2.1.2
[82] fs_1.5.0
[83] magrittr_2.0.1
[84] RSpectra_0.16-0
[85] data.table_1.14.2
[86] scattermore_0.7
[87] reprex_2.0.1
[88] lmtest_0.9-38
[89] RANN_2.6.1
[90] fitdistrplus_1.1-6
[91] hms_1.1.1
[92] mime_0.12
[93] xtable_1.8-4
[94] readxl_1.3.1
[95] shape_1.4.6
[96] gridExtra_2.3
[97] compiler_4.1.1
[98] scater_1.20.1
[99] KernSmooth_2.23-20
[100] crayon_1.5.1
[101] htmltools_0.5.2
[102] tzdb_0.3.0
[103] mgcv_1.8-36
[104] later_1.3.0
[105] lubridate_1.8.0
[106] DBI_1.1.1
[107] ExperimentHub_2.0.0
[108] dbplyr_2.1.1
[109] MASS_7.3-54
[110] rappdirs_0.3.3
[111] Matrix_1.3-4
[112] cli_3.2.0
[113] metapod_1.0.0
[114] igraph_1.2.7
[115] pkgconfig_2.0.3
[116] plotly_4.10.0
[117] spatstat.sparse_2.0-0
[118] xml2_1.3.3
[119] vipor_0.4.5
[120] dqrng_0.3.0
[121] XVector_0.32.0
[122] rvest_1.0.2
[123] digest_0.6.28
[124] sctransform_0.3.2
[125] RcppAnnoy_0.0.19
[126] spatstat.data_2.1-0
[127] Biostrings_2.60.2
[128] cellranger_1.1.0
[129] leiden_0.3.9
[130] uwot_0.1.10
[131] edgeR_3.34.1
[132] DelayedMatrixStats_1.14.3
[133] curl_4.3.2
[134] shiny_1.7.1
[135] lifecycle_1.0.1
[136] nlme_3.1-152
[137] jsonlite_1.8.0
[138] BiocNeighbors_1.10.0
[139] viridisLite_0.4.0
[140] limma_3.48.3
[141] fansi_1.0.3
[142] pillar_1.7.0
[143] lattice_0.20-44
[144] KEGGREST_1.32.0
[145] fastmap_1.1.0
[146] httr_1.4.2
[147] survival_3.2-11
[148] interactiveDisplayBase_1.30.0
[149] glue_1.6.2
[150] zip_2.2.0
[151] png_0.1-7
[152] bluster_1.2.1
[153] BiocVersion_3.13.1
[154] bit_4.0.4
[155] stringi_1.7.6
[156] blob_1.2.2
[157] BiocSingular_1.8.1
[158] AnnotationHub_3.0.2
[159] memoise_2.0.0
[160] irlba_2.3.3
[161] future.apply_1.8.1